1 Introduction

This section is obtained from Google SunRoof Project

Solar panels
Solar panels are made up of photovoltaic (PV) cells, which convert sunlight into direct current (DC) electricity.

Inverter
An inverter converts the DC electricity generated by solar panels into the alternating current (AC) electricity used in most homes

Monitoring System A monitoring system sends information about your solar installation, including the amount of energy it generates, to your solar provider, to maintain optimal system performance.

Utility grid
Your solar system is still connected to a local utility grid to provide power after sundown. Connection to the grid also allows your solar system to return to the grid any excess clean energy your system generates during the day. A utility meter measures your power consumption and solar output.

We explore the data as outlined in the Table Of Contents.

2 Read the Data

library(tidyverse)
library(leaflet)
library(rgdal)
library(stringr)
library(DT)
library(jsonlite)

rm(list=ls())

fillColor = "#FFA07A"
fillColor2 = "#F1C40F"

sunroof = read_csv("C:/Users/ddalv/Documents/Courses/Capstone/google-project-sunroof/sunroof_solar_potential_by_censustract.csv")

ls(sunroof)
##  [1] "carbon_offset_metric_tons"           
##  [2] "count_qualified"                     
##  [3] "existing_installs_count"             
##  [4] "install_size_kw_buckets"             
##  [5] "kw_median"                           
##  [6] "kw_total"                            
##  [7] "lat_avg"                             
##  [8] "lat_max"                             
##  [9] "lat_min"                             
## [10] "lng_avg"                             
## [11] "lng_max"                             
## [12] "lng_min"                             
## [13] "number_of_panels_e"                  
## [14] "number_of_panels_f"                  
## [15] "number_of_panels_median"             
## [16] "number_of_panels_n"                  
## [17] "number_of_panels_s"                  
## [18] "number_of_panels_total"              
## [19] "number_of_panels_w"                  
## [20] "percent_covered"                     
## [21] "percent_qualified"                   
## [22] "region_name"                         
## [23] "state_name"                          
## [24] "yearly_sunlight_kwh_e"               
## [25] "yearly_sunlight_kwh_f"               
## [26] "yearly_sunlight_kwh_kw_threshold_avg"
## [27] "yearly_sunlight_kwh_median"          
## [28] "yearly_sunlight_kwh_n"               
## [29] "yearly_sunlight_kwh_s"               
## [30] "yearly_sunlight_kwh_total"           
## [31] "yearly_sunlight_kwh_w"

3 Get Percentage of Non Null Values

In this table below, only the Lowest Twenty Percentage Values are shown.

GetPercentageOfNonNullValues <- function(OurDataSet) {
  
  PercentageOfNonNullValues = ( colSums(!is.na(OurDataSet)) / nrow(OurDataSet) ) * 100
  
  PercentageOfNonNullValues = as.data.frame(PercentageOfNonNullValues)
  PercentageOfNonNullValues = rownames_to_column(PercentageOfNonNullValues,"VariableName")
  
  PercentageOfNonNullValues =  PercentageOfNonNullValues %>% arrange(desc(PercentageOfNonNullValues))
  
  PercentageOfNonNullValues = PercentageOfNonNullValues %>%
    mutate(VariableName =  reorder(VariableName,PercentageOfNonNullValues)) %>% tail(20)
  
datatable(PercentageOfNonNullValues, style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))

}


GetPercentageOfNonNullValues(sunroof)

We observe all the variables have more than 99% values. Looking forward for an interesting analysis.

4 Top Twenty States with 100 Percentage Qualified for Solar Roof Top

We find the states which has regions with Hundred percentage Qualification for Solar Roof Top. In the below bar plot, we show the states which has the Maximum number of such regions.

sunroof$percent_qualified = as.numeric(sunroof$percent_qualified)

sunroof %>%
  filter(percent_qualified == 100) %>%
  group_by(state_name) %>%
  tally() %>%
  mutate(state_name = reorder(state_name,n)) %>%
  arrange(desc(n)) %>%
  head(20) %>%
  
  ggplot(aes(x = state_name,y = n)) +
  geom_bar(stat='identity',colour="white", fill =fillColor2) +
  geom_text(aes(x = state_name, y = 1, label = paste0("(",n,")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'State', y = 'Count Hundred Percent Qualified', 
       title = 'States with Regions Hundred Percent Qualified for Solar') +
  coord_flip() + 
  theme_bw()

5 Map of States with 100 Percentage Qualified for Solar Roof Top

This choropleth shows the No of Regions in each state which is 100 Percentage Qualified for Solar Roof Top.

StateCoverageRegions = sunroof %>%
  filter(percent_qualified == 100) %>%
  group_by(state_name) %>%
  tally() %>%
  mutate(state_name = reorder(state_name,n))

states <- readOGR(dsn = "C:/Users/ddalv/Documents/Courses/Capstone/us-states-cartographic-boundary-shapefiles/cb_2016_us_state_500k.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\ddalv\Documents\Courses\Capstone\us-states-cartographic-boundary-shapefiles\cb_2016_us_state_500k.shp", layer: "cb_2016_us_state_500k"
## with 56 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
statesOriginal = states

StateCoverageRegions$state_name = as.character(StateCoverageRegions$state_name)

states@data = left_join(states@data, StateCoverageRegions, by = c("NAME" = "state_name"))

bins = c(0,5, 10, 15, 20, 25,35, 45)

pal = colorBin("YlOrRd", domain = states@data$n, bins = bins)

labels = sprintf(
  "<strong>%s</strong><br/>%g",
  states@data$NAME, states@data$n
) %>% lapply(htmltools::HTML)

leaflet(data = states) %>%  setView(-96, 37.8, 4) %>% 
  addPolygons(
  fillColor = ~pal(n),
  weight = 2,
  opacity = 1,
  color = "white",
  dashArray = "3",
  fillOpacity = 0.7,
  highlight = highlightOptions(
    weight = 5,
    color = "#666",
    dashArray = "",
    fillOpacity = 0.7,
    bringToFront = TRUE),
  label = labels,
  labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto")) %>%
  addLegend(pal = pal, values = ~n, opacity = 0.7, title = "#Post Codes 100 %age Qualified for Solar Roof Top",
            position = "bottomleft")

We observe that the States of New York , California , Texas , Pennsylvania , Illinois have very High Number of Post Codes with Hundred Percent Qualification for Solar.

6 Counties in NY With Percent Qualified for Solar

We observe that New York has the Highest number of regions with Hundred Percent Qualifications for Solar. Therefore we now go deeper and examine the Counties in New York and show in a bar plot the counties and their Percentage Qualification for Solar.

CountyData = read_csv("C:/Users/ddalv/Documents/Courses/Capstone/google-project-sunroof/project-sunroof-county-09082017.csv")

NYCountyData = CountyData %>%
  filter(state_name == "New York") %>%
  arrange(desc(percent_qualified)) %>%
  mutate(region_name =  reorder(region_name,percent_qualified))

NYCountyData %>%
  head(20) %>%
  
  ggplot(aes(x = region_name,y = percent_qualified)) +
  geom_bar(stat='identity',colour="white", fill =fillColor) +
  geom_text(aes(x = region_name, y = 1, label = paste0("(",round(percent_qualified),"%",")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'County Name', y = 'Percent Qualified for Solar', 
       title = 'Counties in NY With Percent Qualified for Solar') +
  coord_flip() + 
  theme_bw()

7 States and Carbon Offset in Million Metric Tons

If you are not familiar with the Carbon Offset, I have the following information from Wikipedia.

A carbon offset is a reduction in emissions of carbon dioxide or greenhouse gases made in order to compensate for or to offset an emission made elsewhere.

Carbon offsets are measured in metric tons of carbon dioxide-equivalent (CO2e) and may represent six primary categories of greenhouse gases:[5] carbon dioxide (CO2), methane (CH4), nitrous oxide (N2O), perfluorocarbons (PFCs), hydrofluorocarbons (HFCs), and sulfur hexafluoride (SF6).[6] One carbon offset represents the reduction of one metric ton of carbon dioxide or its equivalent in other greenhouse gases.

CarbonOffsetState = sunroof %>%
  group_by(state_name) %>%
  summarise(CarbonOffset = sum(carbon_offset_metric_tons)) %>%
  arrange(desc(CarbonOffset)) %>%
  mutate(state_name = reorder(state_name,CarbonOffset))


CarbonOffsetState %>%
  
  head(20)%>%
  
  ggplot(aes(x = state_name,y = CarbonOffset)) +
  geom_bar(stat='identity',colour="white", fill =fillColor2) +
  geom_text(aes(x = state_name, y = 1, label = paste0("(",round(CarbonOffset/1000000)," M ",")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'State', y = 'Carbon Offset', 
       title = 'States and Carbon Offset in Million Metric Tons') +
  coord_flip() + 
  theme_bw()

8 Map of States for Carbon Offset Potential

We examine the states for Carbon Offset Potential through a choropleth.

CarbonOffsetState = CarbonOffsetState %>% 
  mutate(CarbonOffsetInMillions = CarbonOffset/1000000)

states = statesOriginal

states@data = left_join(states@data, CarbonOffsetState, by = c("NAME" = "state_name"))


bins <- c(0,7, 15, 20, 30,40,50, 55,60)

pal <- colorBin("YlOrRd", domain = states@data$CarbonOffsetInMillions, bins = bins)

labels <- sprintf(
  "<strong>%s</strong><br/>%g",
  states@data$NAME, states@data$CarbonOffsetInMillions
) %>% lapply(htmltools::HTML)

leaflet(data = states) %>%  setView(-96, 37.8, 4) %>% 
  addPolygons(
    fillColor = ~pal(CarbonOffsetInMillions),
    weight = 2,
    opacity = 1,
    color = "white",
    dashArray = "3",
    fillOpacity = 0.7,
    highlight = highlightOptions(
      weight = 5,
      color = "#666",
      dashArray = "",
      fillOpacity = 0.7,
      bringToFront = TRUE),
    label = labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>%
  addLegend(pal = pal, values = ~CarbonOffsetInMillions, opacity = 0.7, title = "Carbon Offset Potential in Millions Metric Tons",
            position = "bottomleft")

We observe that Texas , Florida and California have very high potential for Carbon Offset.

9 Examine Texas Counties for Carbon Offset

We show the Choropleth for the Texas Counties with the Carbon Offset Data

USCounties <- readOGR(dsn = "C:/Users/ddalv/Documents/Courses/Capstone/us-states-cartographic-boundary-shapefiles/cb_2016_48_cousub_500k.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\ddalv\Documents\Courses\Capstone\us-states-cartographic-boundary-shapefiles\cb_2016_48_cousub_500k.shp", layer: "cb_2016_48_cousub_500k"
## with 862 features
## It has 10 fields
## Integer64 fields read as strings:  ALAND AWATER
CountyData = read_csv("C:/Users/ddalv/Documents/Courses/Capstone/google-project-sunroof/project-sunroof-county-09082017.csv")

TexasCountyData = CountyData %>%
  filter(state_name == "Texas") %>%
  group_by(region_name) %>%
  summarise(CarbonOffset = sum(carbon_offset_metric_tons)/1000) %>%
  arrange(desc(CarbonOffset)) %>%
  mutate(region_name =  reorder(region_name,CarbonOffset))


TexasCounties2 = sapply(str_split(TexasCountyData$region_name," "), head, 1)
TexasCountyData$region_name = TexasCounties2

USCounties@data$NAME = as.character(USCounties@data$NAME)

USCounties@data = inner_join(USCounties@data, TexasCountyData, by = c("NAME" = "region_name")) 

bins <- c(0,500,1000, 2000, 3000, 5000, 6000,9000, 13000)

pal <- colorBin("YlOrRd", domain = USCounties@data$CarbonOffset, bins = bins)

labels <- sprintf(
  "<strong>%s</strong><br/>%g",
  USCounties@data$NAME, USCounties@data$CarbonOffset
) %>% lapply(htmltools::HTML)


leaflet(data = USCounties) %>% setView(-98.613281, 31.203405, 6)  %>%
  addPolygons(
    fillColor = ~pal(CarbonOffset),
    weight = 2,
    opacity = 1,
    color = "white",
    dashArray = "3",
    fillOpacity = 0.7,
    highlight = highlightOptions(
      weight = 5,
      color = "#666",
      dashArray = "",
      fillOpacity = 0.7,
      bringToFront = TRUE),
    label = labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>%
  addLegend(pal = pal, values = ~CarbonOffset, opacity = 0.7, title = "Carbon Offset in '000 metric Tons",
            position = "bottomright")

10 Number of Buildings for Various Installation Size Buckets in Houston

10.1 Installation Size Buckets <= 50kw

CityData = read_csv("C:/Users/ddalv/Documents/Courses/Capstone/google-project-sunroof/project-sunroof-city-09082017.csv")

HoustonCity = CityData %>%
  filter(region_name == "Houston") %>%
  filter(state_name == "Texas")


HoustonBuckets = jsonlite::fromJSON(HoustonCity$install_size_kw_buckets_json, simplifyDataFrame = TRUE)

Buckets = data.frame(BucketName = character(), BucketKW = numeric())

for( counter in  1:10)
{
  BucketName = paste0(HoustonBuckets[counter,1],"-",HoustonBuckets[counter+1,1])
  
  BucketRow  = data.frame(BucketName = BucketName, BucketKW = HoustonBuckets[counter,2])
  
  Buckets  = rbind(Buckets,BucketRow)
  
  
}

Buckets %>%
  
  ggplot(aes(x = BucketName,y = BucketKW)) +
  geom_bar(stat='identity',colour="white", fill =fillColor2) +
  labs(x = 'Installation Size in kW', y = 'Count of Buildings', 
       title = 'Rooftop solar capacity distribution (number of roofs, < 50kW)') +
   theme_bw()

10.2 Installation Size Buckets 50kW-1MW

Buckets = data.frame(BucketName = character(), BucketKW = numeric())

BucketSize = 50

counter = 11

while( BucketSize <= 1000)
{
  BucketName = paste0(HoustonBuckets[counter,1],"-",HoustonBuckets[counter+10,1])
  BucketKW = 0
  counter2 = counter +10
  while( counter < counter2)
  {
    BucketKW = BucketKW + as.numeric(HoustonBuckets[counter,2])
    
    counter = counter + 1
  }
  BucketRow  = data.frame(BucketName = BucketName, BucketKW = BucketKW)
  
  if( BucketSize < 1000)
  {
    BucketSize = HoustonBuckets[counter+10,1]
  }
  else
  {
    BucketSize = 1001
  }
  
  
  Buckets  = rbind(Buckets,BucketRow)
  
  
}

Buckets %>%
  
  ggplot(aes(x = BucketName,y = BucketKW)) +
  geom_bar(stat='identity',colour="white", fill =fillColor2) +
  labs(x = 'Installation Size in kW', y = 'Count of Buildings', 
       title = 'Rooftop solar capacity distribution (number of roofs,  50kW-1MW)') +
  theme_bw() + theme(axis.text.x  = element_text(angle=90, vjust=0.5, size=10))

11 Yearly Solar energy generation potential for different roof faces in Houston

Description of the different roof faces and the total solar energy generation is shown below

Type Description
yearly_sunlight_kwh_f total solar energy generation potential for flat roof space in that region
yearly_sunlight_kwh_n total solar energy generation potential for north roof space in that region
yearly_sunlight_kwh_e total solar energy generation potential for east roof space in that region
yearly_sunlight_kwh_s total solar energy generation potential for south roof space in that region
yearly_sunlight_kwh_w total solar energy generation potential for west roof space in that region
HoustonCity2 = gather(HoustonCity, key = Type, value = measurement,yearly_sunlight_kwh_f,
                                      yearly_sunlight_kwh_n,
                      yearly_sunlight_kwh_s, yearly_sunlight_kwh_e, yearly_sunlight_kwh_w)

HoustonCity2 = HoustonCity2 %>%
  select(Type,measurement) %>%
  mutate( Type = reorder(Type,measurement))



HoustonCity2 %>%
  ggplot(aes(x = Type,y = measurement)) +
  geom_bar(stat='identity',colour="white", fill =fillColor2) +
  labs(x = 'Roof Face Type', y = 'Measurement in kwh', 
       title = 'Total solar energy generation potential in different Directions') +
  coord_flip() +
  theme_bw()

12 Number of Solar Panels potential for different roof faces in Houston

Description of the different roof faces and the total solar energy generation is shown below

Type Description
number_of_panels_f # of solar panels potential for flat roof space in that region, assuming 1.650m x 0.992m panels
number_of_panels_n # of solar panels potential for north roof space in that region, assuming 1.650m x 0.992m panels
number_of_panels_e # of solar panels potential for east roof space in that region, assuming 1.650m x 0.992m panels
number_of_panels_s # of solar panels potential for south roof space in that region, assuming 1.650m x 0.992m panels
number_of_panels_w # of solar panels potential for west roof space in that region, assuming 1.650m x 0.992m panels
HoustonCity2 = gather(HoustonCity, key = Type, value = measurement,
                      number_of_panels_f,number_of_panels_n,
                      number_of_panels_s, number_of_panels_e, 
                      number_of_panels_w)

HoustonCity2 = HoustonCity2 %>%
  select(Type,measurement) %>%
  mutate( Type = reorder(Type,measurement))



HoustonCity2 %>%
  ggplot(aes(x = Type,y = measurement)) +
  geom_bar(stat='identity',colour="white", fill =fillColor) +
  labs(x = 'Direction of Panels', y = 'Number of Potential Panels', 
       title = 'Number of Potential Panels in different Directions') +
  coord_flip() +
  theme_bw()

13 Carbon Offset Potential of Cities

We examine the Carbon Offset Potential of the Cities and plot the Top Twenty in a bar plot.

CarbonOffsetCities = CityData %>%
  arrange(desc(carbon_offset_metric_tons)) %>%
  head(20)

CarbonOffsetCities %>%
  mutate(region_name = reorder(region_name,carbon_offset_metric_tons)) %>%
 ggplot(aes(x = region_name,y = carbon_offset_metric_tons)) +
  geom_bar(stat='identity',colour="white", fill =fillColor2) +
  geom_text(aes(x = region_name, y = 1, 
                label = paste0("(",round( (carbon_offset_metric_tons/10e+3),2),")",sep="")),
            hjust=0, vjust=.5, size = 4, colour = 'black',
            fontface = 'bold') +
  labs(x = 'Cities', y = 'Carbon Offset Potential in Thousands Metric Tons', 
       title = 'Cities and Carbon Offset Potential in Thousands Metric Tons') +
  coord_flip() + 
  theme_bw()

14 Map of Carbon Offset Potential of Cities

We plot the map of Top Fifty Cities which have high Carbon Offset Potential.

CarbonOffsetCities = CityData %>%
  arrange(desc(carbon_offset_metric_tons)) %>%
  head(50)

center_lon = median(CarbonOffsetCities$lng_avg)
center_lat = median(CarbonOffsetCities$lat_avg)

leaflet(CarbonOffsetCities) %>% addProviderTiles("Esri.NatGeoWorldMap") %>%
  addCircles(lng = ~lng_avg, lat = ~lat_avg,radius = ~sqrt(carbon_offset_metric_tons)*30, 
             color = c("red"))  %>%
  # controls
  setView(lng=center_lon, lat=center_lat, zoom=4)